using System;
using System.IO;
using System.Runtime.InteropServices;
using DynaPDF;

namespace text_extraction2
{
	public class CPDFToText
	{
      private const double MAX_LINE_ERROR = 4.0; // This must be the square of the allowed error (2 * 2 in this case).

      protected struct TGState
      {
         public IntPtr    ActiveFont;
         public float     CharSpacing;
         public double    FontSize;
         public TFontType FontType;
         public TCTM      Matrix;
         public float     SpaceWidth;
         public TDrawMode TextDrawMode;
         public float     TextScale;
         public float     WordSpacing;
      }
      protected class CStack
      {
         public bool Restore(ref TGState F)
         {
            if (m_Count > 0)
            {
               --m_Count;
               F = m_Items[m_Count];
               return true;
            }
            return false;
         }
         public int Save(ref TGState F)
         {
            if (m_Count == m_Capacity)
            {
               m_Capacity += 16;
               try
               {
                  TGState[] tmp = new TGState[m_Capacity];
                  if (m_Items != null) m_Items.CopyTo(tmp, 0);
                  m_Items = tmp;
                  tmp = null;
               }
               catch
               {
                  m_Capacity -= 16;
                  return -1;
               }
            }
            m_Items[m_Count] = F;
            ++m_Count;
            return 0;
         }
         private uint      m_Capacity;
         private uint      m_Count;
         private TGState[] m_Items;
      }

      internal CPDFToText(CPDF PDFInst)
      {
         m_GState.ActiveFont   = IntPtr.Zero;
         m_GState.CharSpacing  = 0.0f;
         m_GState.FontSize     = 1.0;
         m_GState.FontType     = TFontType.ftType1;
         m_GState.Matrix.a     = 1.0;
         m_GState.Matrix.b     = 0.0;
         m_GState.Matrix.c     = 0.0;
         m_GState.Matrix.d     = 1.0;
         m_GState.Matrix.x     = 0.0;
         m_GState.Matrix.y     = 0.0;
         m_GState.SpaceWidth   = 0.0f;
         m_GState.TextDrawMode = TDrawMode.dmNormal;
         m_GState.TextScale    = 100.0f;
         m_GState.WordSpacing  = 0.0f;
         m_LastTextDir         = TTextDir.tfNotInitialized;
         m_PDF                 = PDFInst;
         m_Stack               = new CStack();
      }

      public int AddText(TCTM Matrix, TTextRecordA[] Source, TTextRecordW[] Kerning, int Count, double Width, bool Decoded)
      {
         if (!Decoded) return 0;
         try
         {
            TTextDir textDir;
            double x1 = 0.0;
            double y1 = 0.0;
            double x2 = 0.0;
            double y2 = m_GState.FontSize;
            // Transform the text matrix to user space
            TCTM m = MulMatrix(m_GState.Matrix, Matrix);
            // Start point of the text record
            Transform(m, ref x1, ref y1);
            /* The second point to determine the text direction can also be used to calculate
               the visible font size measured in user space:

               double realFontSize = CalcDistance(x1, y1, x2, y2);
            */
            Transform(m, ref x2, ref y2);
            // Determine the text direction
            if (y1 == y2)
               textDir = (TTextDir)((System.Convert.ToInt32(x1 > x2) + 1) << 1);
            else
               textDir = (TTextDir)System.Convert.ToInt32(y1 > y2);

            // Wrong direction or not on the same text line?
            if (textDir != m_LastTextDir || !IsPointOnLine(x1, y1, m_LastTextEndX, m_LastTextEndY, m_LastTextInfX, m_LastTextInfY))
            {
               // Extend the x-coordinate to an infinite point.
               m_LastTextInfX = 1000000.0;
               m_LastTextInfY = 0.0;
               Transform(m, ref m_LastTextInfX, ref m_LastTextInfY);
               if (m_LastTextDir != TTextDir.tfNotInitialized)
               {
                  // Add a new line to the output file
                  m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes("\r\n"));
               }
            }else
            {
               /*
                  The space width is measured in text space but the distance between two text
                  records is measured in user space! We must transform the space width to user
                  space before we can compare the values.
               */
               double distance, spaceWidth;
               // Note that we use the full space width here because the end position of the last record
               // was set to the record width minus the half space width.
               double x3 = m_GState.SpaceWidth;
               double y3 = 0.0;
               Transform(m, ref x3, ref y3);
               spaceWidth = CalcDistance(x1, y1, x3 ,y3);
               distance   = CalcDistance(m_LastTextEndX, m_LastTextEndY, x1, y1);
               if (distance > spaceWidth)
               {
                  // Add a space to the output file
                  m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes(" "));
               }
            }
            // We use the half space width to determine whether a space must be inserted at
            // a specific position. This produces better results in most cases.
            float spw = -m_GState.SpaceWidth * 0.5f;
            for (int i = 0; i < Count; i++)
            {
               TTextRecordW rec = Kerning[i];
               if (rec.Advance < spw)
               {
                  // Add a space to the output file
                  m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes(" "));
               }
               m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes(Marshal.PtrToStringUni(rec.Text, rec.Length)));
            }
            // We don't set the cursor to the real end of the string because applications like MS Word
            // add often a space to the end of a text record and this space can slightly overlap the next
            // record. IsPointOnLine() would return false in this case.
            m_LastTextEndX = Width + spw; // spw is a negative value!
            m_LastTextEndY = 0.0;
            m_LastTextDir  = textDir;
            // Calculate the end coordinate of the text record
            Transform(m, ref m_LastTextEndX, ref m_LastTextEndY);
            return 0;
         }catch
         {
            return -1;
         }
      }

      public int BeginTemplate(TPDFRect BBox, IntPtr Matrix)
      {
         if (SaveGState() < 0) return -1;
         if (!IntPtr.Zero.Equals(Matrix))
         {
            TCTM m = (TCTM)Marshal.PtrToStructure(Matrix, typeof(TCTM));
            m_GState.Matrix = MulMatrix(m_GState.Matrix, m);
         }
         return 0;
      }

      private double CalcDistance(double x1, double y1, double x2, double y2)
      {
         double dx = x2-x1;
         double dy = y2-y1;
         return Math.Sqrt(dx*dx + dy*dy);
      }

      public void Close()
      {
         m_File.Flush();
         m_File.Close();
         m_File   = null;
         m_Stream = null;
      }

      public void EndTemplate()
      {
         RestoreGState();
      }

      public void Init()
      {
         while (RestoreGState());
         m_GState.ActiveFont   = IntPtr.Zero;
         m_GState.CharSpacing  = 0.0f;
         m_GState.FontSize     = 1.0;
         m_GState.FontType     = TFontType.ftType1;
         m_GState.Matrix.a     = 1.0;
         m_GState.Matrix.b     = 0.0;
         m_GState.Matrix.c     = 0.0;
         m_GState.Matrix.d     = 1.0;
         m_GState.Matrix.x     = 0.0;
         m_GState.Matrix.y     = 0.0;
         m_GState.SpaceWidth   = 0.0f;
         m_GState.TextDrawMode = TDrawMode.dmNormal;
         m_GState.TextScale    = 100.0f;
         m_GState.WordSpacing  = 0.0f;
         m_LastTextDir         = TTextDir.tfNotInitialized;
      }

      private bool IsPointOnLine(double x, double y, double x0, double y0, double x1, double y1)
      {
         double dx, dy, di;
         x -= x0;
         y -= y0;
         dx = x1 - x0;
         dy = y1 - y0;
         di = (x*dx + y*dy) / (dx*dx + dy*dy);
         di = (di < 0.0) ? 0.0 : (di > 1.0) ? 1.0 : di;
         dx = x - di * dx;
         dy = y - di * dy;
         di = dx*dx + dy*dy;
         return (di < MAX_LINE_ERROR);
      }

      public void MulMatrix(TCTM Matrix)
      {
         m_GState.Matrix = MulMatrix(m_GState.Matrix, Matrix);
      }

      private TCTM MulMatrix(TCTM M1, TCTM M2)
      {
         TCTM retval;
         retval.a = M2.a * M1.a + M2.b * M1.c;
         retval.b = M2.a * M1.b + M2.b * M1.d;
         retval.c = M2.c * M1.a + M2.d * M1.c;
         retval.d = M2.c * M1.b + M2.d * M1.d;
         retval.x = M2.x * M1.a + M2.y * M1.c + M1.x;
         retval.y = M2.x * M1.b + M2.y * M1.d + M1.y;
         return retval;
      }

      public void Open(String FileName)
      {
         m_Stream = new FileStream(FileName, FileMode.Create, FileAccess.ReadWrite);
         m_File   = new BinaryWriter(m_Stream, System.Text.Encoding.Unicode);
         // Write a Little Endian marker to the file (byte order mark)
         m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes("\uFEFF"));
      }

      public bool RestoreGState()
      {
         return m_Stack.Restore(ref m_GState);
      }

      public int SaveGState()
      {
         return m_Stack.Save(ref m_GState);
      }

      public void SetCharSpacing(double Value)
      {
         m_GState.CharSpacing = (float)Value;
      }

      public void SetFont(double FontSize, TFontType Type, IntPtr Font)
      {
         m_GState.ActiveFont = Font;
         m_GState.FontSize   = FontSize;
         m_GState.FontType   = Type;
         m_GState.SpaceWidth = (float)m_PDF.GetSpaceWidth(Font, FontSize);
         if (FontSize < 0.0)
            m_GState.SpaceWidth = -m_GState.SpaceWidth;
      }

      public void SetTextDrawMode(TDrawMode Mode)
      {
         m_GState.TextDrawMode = Mode;
      }

      public void SetTextScale(double Value)
      {
         m_GState.TextScale = (float)Value;
      }

      public void SetWordSpacing(double Value)
      {
         m_GState.WordSpacing = (float)Value;
      }

      private void Transform(TCTM M, ref double x, ref double y)
      {
         double tx = x;
         x = tx * M.a + y * M.c + M.x;
         y = tx * M.b + y * M.d + M.y;
      }

      public void WritePageIdentifier(int PageNum)
      {
         if (PageNum > 1)
         {
            m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes("\r\n"));
         }
         m_File.Write(System.Text.UnicodeEncoding.Unicode.GetBytes(String.Format("%----------------------- Page {0} -----------------------------\r\n", PageNum)));
      }

      protected enum TTextDir
      {
         tfLeftToRight    = 0,
         tfRightToLeft    = 1,
         tfTopToBottom    = 2,
         tfBottomToTop    = 4,
         tfNotInitialized = -1
      }
      protected BinaryWriter m_File;
      protected TGState      m_GState;
      protected bool         m_HavePos;
      protected TTextDir     m_LastTextDir;
      protected double       m_LastTextEndX;
      protected double       m_LastTextEndY;
      protected double       m_LastTextInfX;
      protected double       m_LastTextInfY;
      internal  CPDF         m_PDF;
      protected CStack       m_Stack;
      protected FileStream   m_Stream;
	}
}
